Wisconsin Prognosis

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data

dataBreast <- read.csv("~/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)
## 
##   N   R 
## 151  47
rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)
## 
##   0   1 
## 148  46

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast)

[++++]

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
V24 0.046939 1.01 1.05 1.08 0.598 0.237
V26 0.004725 1.00 1.00 1.01 0.593 0.237
V27 0.000242 1.00 1.00 1.00 0.608 0.237
V34 0.011942 1.00 1.01 1.02 0.634 0.237
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI
V24 0.598 0.609 0.5 0.609 0.0619 0.437 2.87
V26 0.593 0.598 0.5 0.598 0.0626 0.393 2.77
V27 0.608 0.608 0.5 0.608 0.0563 0.434 2.76
V34 0.634 0.618 0.5 0.618 0.0320 0.471 2.42
  z.NRI Delta.AUC Frequency
V24 2.67 0.1091 1
V26 2.38 0.0983 1
V27 2.63 0.1084 1
V34 2.85 0.1178 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataBreast)
timeinterval <- 2*mean(subset(dataBreast,status==1)$time)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.323 51.1
rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.42044 0.2640 0.1655 1.61e-01 0.497469
RR 2.18301 2.2857 4.3220 2.50e+01 2.307692
RR_LCI 1.30105 1.3037 0.6348 5.22e-02 1.275480
RR_UCI 3.66282 4.0074 29.4258 1.20e+04 4.175246
SEN 0.26087 0.6957 0.9783 1.00e+00 0.152174
SPE 0.89865 0.5608 0.1081 6.76e-02 0.952703
BACC 0.57976 0.6282 0.5432 5.34e-01 0.552438
NetBenefit 0.00577 0.0448 0.0971 1.00e-01 0.000371
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.823 0.603 1.1 0.203
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.01 1.01 0.957 1.07
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.92 0.92 0.911 0.928
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.68 0.68 0.605 0.752
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.637 0.546 0.728
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.418
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.608565 on 1 degrees of freedom, p = 0.000656
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.1 1.23 11.6
class=1 27 12 4.9 10.27 11.6

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBreast,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.302 0.938 48.3

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBreast$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Calibrated Train: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.4004 0.2498 0.156 1.52e-01 0.50187
RR 2.1830 2.2857 4.322 2.50e+01 2.49545
RR_LCI 1.3011 1.3037 0.635 5.22e-02 1.36264
RR_UCI 3.6628 4.0074 29.426 1.20e+04 4.57001
SEN 0.2609 0.6957 0.978 1.00e+00 0.13043
SPE 0.8986 0.5608 0.108 6.76e-02 0.96622
BACC 0.5798 0.6282 0.543 5.34e-01 0.54833
NetBenefit 0.0102 0.0534 0.106 1.09e-01 0.00497
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.83 0.607 1.11 0.226
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.962 1.08
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.945 0.945 0.936 0.953
pander::pander(t(rrAnalysisTrain$c.index$cstatCI),caption="C. Index")
C. Index
mean.C Index median lower upper
0.68 0.679 0.595 0.759
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.637 0.546 0.728
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.398
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.608565 on 1 degrees of freedom, p = 0.000656
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.1 1.23 11.6
class=1 27 12 4.9 10.27 11.6

Cross-Validation

Here we use the estimated h0 and timeinterval from the full set

rcv <- randomCV(theData=dataBreast,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.9,
                repetitions=100,
                classSamplingType = "Pro"
         )

.[++++].[++++].[+++].[++].[+++].[++++].[++++].[+++++].[+++].[++++++]10 Tested: 134 Avg. Selected: 4.3 Min Tests: 1 Max Tests: 4 Mean Tests: 1.492537 . MAD: 0.4885734

.[+++++].[++++++].[+++].[+++++].[++++++].[–].[++++].[++++].[+].[+++++]20 Tested: 167 Avg. Selected: 4.3 Min Tests: 1 Max Tests: 7 Mean Tests: 2.39521 . MAD: 0.4898076

.[++++++++].[+++].[+++].[++++++].[++++++].[+++].[++++++].[++++].[+++++].[+++]30 Tested: 189 Avg. Selected: 4.533333 Min Tests: 1 Max Tests: 8 Mean Tests: 3.174603 . MAD: 0.4895265

.[+++].[+++++++].[+++++++].[+++++].[+].[++].[++++].[++++].[+++].[++++++]40 Tested: 191 Avg. Selected: 4.6 Min Tests: 1 Max Tests: 10 Mean Tests: 4.188482 . MAD: 0.4876964

.[+++].[++++++].[++++].[+++++++].[+++++++].[++++++].[+++++++].[+].[++].[+++++]50 Tested: 193 Avg. Selected: 4.7 Min Tests: 1 Max Tests: 12 Mean Tests: 5.181347 . MAD: 0.4862951

.[–].[++++].[++].[-++++++].[++].[++++++++].[+++++++].[+++].[+].[++++]60 Tested: 193 Avg. Selected: 4.566667 Min Tests: 1 Max Tests: 12 Mean Tests: 6.217617 . MAD: 0.4867237

.[+++].[+++++].[++++].[+++++].[+++++++].[+].[++++].[++++++].[++++].[++++]70 Tested: 194 Avg. Selected: 4.6 Min Tests: 1 Max Tests: 15 Mean Tests: 7.216495 . MAD: 0.4852746

.[++++++++].[+++++++].[+].[++++++].[+++].[++++].[++++].[++++].[++++].[+++++]80 Tested: 194 Avg. Selected: 4.6625 Min Tests: 2 Max Tests: 15 Mean Tests: 8.247423 . MAD: 0.4849865

.[+++].[+++].[++++].[+++++++].[++].[++].[+++].[+++++].[+++].[++++]90 Tested: 194 Avg. Selected: 4.566667 Min Tests: 2 Max Tests: 17 Mean Tests: 9.278351 . MAD: 0.4846336

.[++++].[++++++].[++++].[++++].[++++].[+++].[+++++++++].[+++].[++++].[+++]100 Tested: 194 Avg. Selected: 4.61 Min Tests: 2 Max Tests: 21 Mean Tests: 10.30928 . MAD: 0.4838379

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.398 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.3982 0.234 0.156 1.37e-01 0.5029
RR 1.5185 2.256 2.529 1.22e+01 1.5679
RR_LCI 0.8480 1.246 0.663 2.62e-02 0.7388
RR_UCI 2.7191 4.087 9.651 5.65e+03 3.3277
SEN 0.2174 0.739 0.957 1.00e+00 0.1087
SPE 0.8649 0.500 0.122 3.38e-02 0.9392
BACC 0.5411 0.620 0.539 5.17e-01 0.5239
NetBenefit -0.0167 0.059 0.103 1.20e-01 -0.0212
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.801 0.587 1.07 0.146
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.992 0.991 0.93 1.05
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.874 0.873 0.858 0.887
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.653 0.652 0.571 0.728
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.6 0.507 0.694
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.217 0.109 0.364
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.872 0.807 0.921
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.398
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 3.671181 on 1 degrees of freedom, p = 0.055361
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 165 36 40.27 0.453 3.67
class=1 29 10 5.73 3.188 3.67

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.32 0.992 49.3
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

### Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.4239 0.2319 0.155 1.36e-01 0.4999
RR 1.4912 2.2562 2.529 1.22e+01 1.5679
RR_LCI 0.7931 1.2456 0.663 2.62e-02 0.7388
RR_UCI 2.8038 4.0866 9.651 5.65e+03 3.3277
SEN 0.1739 0.7391 0.957 1.00e+00 0.1087
SPE 0.8919 0.5000 0.122 3.38e-02 0.9392
BACC 0.5329 0.6196 0.539 5.17e-01 0.5239
NetBenefit -0.0194 0.0601 0.104 1.21e-01 -0.0206
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.825 0.604 1.1 0.203
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.963 1.08
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.886 0.886 0.87 0.9
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.653 0.653 0.568 0.732
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.6 0.507 0.694
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.174 0.0782 0.314
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.426
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 3.563064 on 1 degrees of freedom, p = 0.059079
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 171 38 41.71 0.33 3.56
class=1 23 8 4.29 3.21 3.56